package cn.edu.bjtu.model.word2vec.util;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
/**
 * 
 * @author liyao
 * 2017-04-20 
 * 函数功能，归一化tf-idf
 */

public class NormalizedTFIDF {
public static void main(String[] args){
	String inputPath ="/home/liyao/tools/TBD/backup/0421/files/tfidf.txt";
	String outputPath = "/home/liyao/tools/TBD/backup/0421/files/normalizedtf.txt";
	File inputFile = new File(inputPath);
	File outputFile = new File(outputPath);
	try {
		readVocab(inputFile, outputFile);
	} catch (IOException e) {
		// TODO Auto-generated catch block
		e.printStackTrace();
	}
	}
/**
 * 归一化tf-idf
 * 
 * @param file
 * @throws IOException
 */
private static void readVocab(File inPutFile,File outputFile) throws IOException {
	BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outputFile), "utf-8"));

  try (BufferedReader br = new BufferedReader(new InputStreamReader(
      new FileInputStream(inPutFile)))) {
	  String temp;
    while ((temp = br.readLine()) != null) {
    	double TFIDF_SUM = 0;
//      String[] split = temp.split(" ");
      StringBuffer line = new StringBuffer();
      line.append(temp.split("\t")[0]).append("\t");
      String[] split = temp.split("\t")[1].split(" ");
      for (String string : split) {
        TFIDF_SUM += Math.pow(Double.parseDouble(string),2);
      }
      TFIDF_SUM = Math.sqrt(TFIDF_SUM);
      for(String string :split){
    	  double normal = Double.parseDouble(string)/TFIDF_SUM;
    	  line.append(normal).append(" ");
      }
      String element = line.toString().trim();
		writer.write(element);
		writer.newLine();
    }
    writer.close();
  }
}
}
